import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
LifeExpectancy = pd.read_csv("Life Expectancy.csv")
LifeExpectancy.head()
Country | Region | Year | Infant_deaths | Under_five_deaths | Adult_mortality | Alcohol_consumption | Hepatitis_B | Measles | BMI | ... | Diphtheria | Incidents_HIV | GDP_per_capita | Population_mln | Thinness_ten_nineteen_years | Thinness_five_nine_years | Schooling | Economy_status_Developed | Economy_status_Developing | Life_expectancy | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Turkiye | Middle East | 2015 | 11.1 | 13.0 | 105.8240 | 1.32 | 97 | 65 | 27.8 | ... | 97 | 0.08 | 11006 | 78.53 | 4.9 | 4.8 | 7.8 | 0 | 1 | 76.5 |
1 | Spain | European Union | 2015 | 2.7 | 3.3 | 57.9025 | 10.35 | 97 | 94 | 26.0 | ... | 97 | 0.09 | 25742 | 46.44 | 0.6 | 0.5 | 9.7 | 1 | 0 | 82.8 |
2 | India | Asia | 2007 | 51.5 | 67.9 | 201.0765 | 1.57 | 60 | 35 | 21.2 | ... | 64 | 0.13 | 1076 | 1183.21 | 27.1 | 28.0 | 5.0 | 0 | 1 | 65.4 |
3 | Guyana | South America | 2006 | 32.8 | 40.5 | 222.1965 | 5.68 | 93 | 74 | 25.3 | ... | 93 | 0.79 | 4146 | 0.75 | 5.7 | 5.5 | 7.9 | 0 | 1 | 67.0 |
4 | Israel | Middle East | 2012 | 3.4 | 4.3 | 57.9510 | 2.89 | 97 | 89 | 27.0 | ... | 94 | 0.08 | 33995 | 7.91 | 1.2 | 1.1 | 12.8 | 1 | 0 | 81.7 |
5 rows × 21 columns
LifeExpectancy.columns
Index(['Country', 'Region', 'Year', 'Infant_deaths', 'Under_five_deaths', 'Adult_mortality', 'Alcohol_consumption', 'Hepatitis_B', 'Measles', 'BMI', 'Polio', 'Diphtheria', 'Incidents_HIV', 'GDP_per_capita', 'Population_mln', 'Thinness_ten_nineteen_years', 'Thinness_five_nine_years', 'Schooling', 'Economy_status_Developed', 'Economy_status_Developing', 'Life_expectancy'], dtype='object')
LifeExpectancy["Region"].unique()
NorthAmerica = LifeExpectancy[LifeExpectancy["Region"] == "North America"]
EuropeanUnion = LifeExpectancy[LifeExpectancy["Region"] == "European Union"]
Asiaa = LifeExpectancy[LifeExpectancy["Region"] == "Asia"]
Africaa = LifeExpectancy[LifeExpectancy["Region"] == "Africa"]
NA = NorthAmerica[["Year","Life_expectancy"]].groupby("Year").mean()
EU = EuropeanUnion[["Year","Life_expectancy"]].groupby("Year").mean()
AS = Asiaa[["Year","Life_expectancy"]].groupby("Year").mean()
AF = Africaa[["Year","Life_expectancy"]].groupby("Year").mean()
AVG_LE = LifeExpectancy[["Year","Life_expectancy"]].groupby("Year").mean()
NA.reset_index(inplace = True)
EU.reset_index(inplace = True)
AS.reset_index(inplace = True)
AF.reset_index(inplace = True)
x = NA["Year"]
y_1 = NA["Life_expectancy"]
y_2 = EU["Life_expectancy"]
y_3 = AS["Life_expectancy"]
y_4 = AF["Life_expectancy"]
y_5 = AVG_LE["Life_expectancy"]
plt.plot(x, y_1, label = "North America")
plt.plot(x, y_2, label = "EU")
plt.plot(x, y_3, label = "Asia")
plt.plot(x, y_4, label = "Africa")
plt.plot(x, y_5, label = "World Average")
plt.legend(loc = "lower right", fontsize = "8")
plt.xlabel("Years")
plt.ylabel("Life Expectancy")
plt.title("Average Life Expectancy in Different Regions from 2000-2015 ")
plt.show()
LifeExpectancy[["Adult_mortality", "Life_expectancy"]].plot.scatter(x = "Adult_mortality", y = "Life_expectancy")
plt.xlabel("Adult Mortality")
plt.ylabel("Life Expectancy")
plt.title("Relationship Between Life Expectancy and Adult Mortality ")
Text(0.5, 1.0, 'Relationship Between Life Expectancy and Adult Mortality ')
LifeExpectancy[LifeExpectancy["Adult_mortality"] == LifeExpectancy["Adult_mortality"].max()]["Region"]
2515 Africa Name: Region, dtype: object
LifeExpectancy[LifeExpectancy["Region"] == "Africa"]["Life_expectancy"].mean()
57.84730392156862
LifeExpectancy[LifeExpectancy["Adult_mortality"] == LifeExpectancy["Adult_mortality"].min()]["Region"]
1613 Rest of Europe Name: Region, dtype: object
LifeExpectancy[LifeExpectancy["Region"] == "Rest of Europe"]["Life_expectancy"].mean()
74.52541666666667
LifeExpectancy[LifeExpectancy["Adult_mortality"] == LifeExpectancy["Adult_mortality"].min()]["Region"]
1613 Rest of Europe Name: Region, dtype: object
LifeExpectancy[["Life_expectancy", "Region"]].groupby(["Region"]).agg("mean").plot.bar()
plt.xlabel("Regions of the World")
plt.ylabel("Life Expectancy")
plt.title("Mean Life Expectancy in Different Regions of the World")
plt.show()
GDP = LifeExpectancy["GDP_per_capita"]
LifeExp = LifeExpectancy["Life_expectancy"]
Constant = sm.add_constant(GDP)
sm.OLS(LifeExp, Constant).fit().summary()
# GDP = 65.1186 + 0.0003 * LE
## GDP = -6.075E04 + 1049.8525 * LifeExp
Dep. Variable: | Life_expectancy | R-squared: | 0.340 |
---|---|---|---|
Model: | OLS | Adj. R-squared: | 0.340 |
Method: | Least Squares | F-statistic: | 1474. |
Date: | Sat, 27 Jul 2024 | Prob (F-statistic): | 1.52e-260 |
Time: | 20:54:40 | Log-Likelihood: | -9887.4 |
No. Observations: | 2864 | AIC: | 1.978e+04 |
Df Residuals: | 2862 | BIC: | 1.979e+04 |
Df Model: | 1 | ||
Covariance Type: | nonrobust |
coef | std err | t | P>|t| | [0.025 | 0.975] | |
---|---|---|---|---|---|---|
const | 65.1186 | 0.173 | 376.787 | 0.000 | 64.780 | 65.457 |
GDP_per_capita | 0.0003 | 8.43e-06 | 38.397 | 0.000 | 0.000 | 0.000 |
Omnibus: | 323.832 | Durbin-Watson: | 2.005 |
---|---|---|---|
Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 443.462 |
Skew: | -0.961 | Prob(JB): | 5.05e-97 |
Kurtosis: | 3.158 | Cond. No. | 2.48e+04 |
GDP1 = LifeExpectancy["GDP_per_capita"]
LifeExp1 = LifeExpectancy["Life_expectancy"]
X = np.linspace(GDP1.min(), GDP1.max())
Y = 65.1186 + 0.0003 * X
#LifeExpectancy[["GDP_per_capita","Life_expectancy"]].plot.scatter(x="GDP_per_capita",y="Life_expectancy",alpha=0.3)
plt.scatter(GDP1,LifeExp1,alpha=0.3)
plt.plot(X,Y,"r")
plt.ylabel("Life Expectancy")
plt.xlabel("GDP per Capita")
plt.title("Relationship Between Life Expectancy and GDP per Capita")
plt.show()
School1 = LifeExpectancy["Schooling"]
Life1 = LifeExpectancy["Life_expectancy"]
Const = sm.add_constant(School1)
sm.OLS( Life1, Const).fit().summary()
# Life = 52.2771 + 2.1723 * School
Dep. Variable: | Life_expectancy | R-squared: | 0.537 |
---|---|---|---|
Model: | OLS | Adj. R-squared: | 0.536 |
Method: | Least Squares | F-statistic: | 3313. |
Date: | Sat, 27 Jul 2024 | Prob (F-statistic): | 0.00 |
Time: | 20:54:44 | Log-Likelihood: | -9381.2 |
No. Observations: | 2864 | AIC: | 1.877e+04 |
Df Residuals: | 2862 | BIC: | 1.878e+04 |
Df Model: | 1 | ||
Covariance Type: | nonrobust |
coef | std err | t | P>|t| | [0.025 | 0.975] | |
---|---|---|---|---|---|---|
const | 52.2771 | 0.312 | 167.609 | 0.000 | 51.666 | 52.889 |
Schooling | 2.1723 | 0.038 | 57.560 | 0.000 | 2.098 | 2.246 |
Omnibus: | 258.863 | Durbin-Watson: | 1.985 |
---|---|---|---|
Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 333.670 |
Skew: | -0.786 | Prob(JB): | 3.50e-73 |
Kurtosis: | 3.570 | Cond. No. | 21.8 |
School1 = LifeExpectancy["Schooling"]
Life1 = LifeExpectancy["Life_expectancy"]
plt.scatter(School1, Life1, alpha = 0.3)
X = np.linspace(0, 16)
Y = 52.2771 + 2.1723 * X
plt.plot(X, Y, "r")
plt.xlabel("Years of Schooling")
plt.ylabel("Life Expectancy")
plt.title("Relationship Between Schooling and Life Expectancy")
plt.show()
Developed = LifeExpectancy[LifeExpectancy["Economy_status_Developed"] == 1]
Developing = LifeExpectancy[LifeExpectancy["Economy_status_Developing"] == 1]
x_1 = Developed["Incidents_HIV"]
x_2 = Developing["Incidents_HIV"]
y_1 = Developed["Life_expectancy"]
y_2 = Developing["Life_expectancy"]
plt.scatter(x_1,y_1)
plt.xlabel("Percentage of Population with HIV")
plt.ylabel("Life Expectancy")
plt.title("Life Expectancy and HIV Rates in Developed Countries")
HIV = np.linspace(0,0.3)
DLE = 78.3592 + 1.9529 * HIV
plt.plot(HIV, DLE, "r")
[<matplotlib.lines.Line2D at 0x1519e1b50>]
plt.scatter(x_2,y_2)
plt.xlabel("Percentage of Population with HIV")
plt.ylabel("Life Expectancy")
plt.title("Life Expectancy and HIV Rate in Developing Countries")
HIV = np.linspace(0,22)
ULE = 68.4233 - 1.8791 * HIV
plt.plot(HIV, ULE, "r")
[<matplotlib.lines.Line2D at 0x151aa1f10>]
Average_Slope_America = np.mean(np.diff(y_1) / np.diff(x))
Average_Slope_Europe = np.mean(np.diff(y_2) / np.diff(x))
Average_Slope_Asia = np.mean(np.diff(y_3) / np.diff(x))
Average_Slope_Africa = np.mean(np.diff(y_4) / np.diff(x))
Average_Slope_World = np.mean(np.diff(y_5) / np.diff(x))
print(Average_Slope_America, Average_Slope_Europe, Average_Slope_Asia,
Average_Slope_Africa, Average_Slope_World)
Dummy = pd.get_dummies(LifeExpectancy["Country"], drop_first = True)
LE = LifeExpectancy["Life_expectancy"]
YR = Dummy.join(LifeExpectancy["Year"])
constant = sm.add_constant(YR)
sm.OLS(LE, constant).fit().summary()
X = LifeExpectancy["Year"]
Y = LifeExpectancy["Life_expectancy"]
const = sm.add_constant(X)
sm.OLS(Y, const).fit().summary()
Dep. Variable: | Life_expectancy | R-squared: | 0.030 |
---|---|---|---|
Model: | OLS | Adj. R-squared: | 0.030 |
Method: | Least Squares | F-statistic: | 89.74 |
Date: | Sat, 27 Jul 2024 | Prob (F-statistic): | 5.51e-21 |
Time: | 20:55:03 | Log-Likelihood: | -10438. |
No. Observations: | 2864 | AIC: | 2.088e+04 |
Df Residuals: | 2862 | BIC: | 2.089e+04 |
Df Model: | 1 | ||
Covariance Type: | nonrobust |
coef | std err | t | P>|t| | [0.025 | 0.975] | |
---|---|---|---|---|---|---|
const | -645.1984 | 75.379 | -8.559 | 0.000 | -793.001 | -497.396 |
Year | 0.3557 | 0.038 | 9.473 | 0.000 | 0.282 | 0.429 |
Omnibus: | 226.336 | Durbin-Watson: | 1.997 |
---|---|---|---|
Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 262.456 |
Skew: | -0.718 | Prob(JB): | 1.02e-57 |
Kurtosis: | 2.628 | Cond. No. | 8.74e+05 |
Year = np.linspace(2016,2026)
LE_USA = -654.3982 + 18.2437 + 0.3557 * Year
LE_World = -645.1984 + 0.3557 * Year
LE_CAR = -654.3982 + 0.3557 * Year -13.1750
LE_JAP = -654.3982 + 0.3557 * Year + 22.8000
plt.plot(Year, LE_USA, label = "USA")
plt.plot(Year, LE_World, label = "World Average")
plt.plot(Year, LE_CAR, label = "Central African Republic")
plt.plot(Year, LE_JAP, label = "Japan")
plt.xlabel("Years")
plt.ylabel("Life Expectancy")
plt.title("Predicted Life Expectancy for 2016-2026")
plt.legend(loc = "lower left", fontsize = "8", bbox_to_anchor =(1, 0.5))
plt.show()